if(interactive()){
  print(date())
  print("This was run interactively. Please run with rmarkdown::render")
}else{
  print(paste0("This notebook was compiled on ", date()))
}
## [1] "This notebook was compiled on Tue Nov 23 00:16:31 2021"
library(projectR)
library(ggplot2)
library(dplyr)
library(monocle3)
library(here)
library(furrr)
library(purrr)
library(EMDomics)
source(here("scripts/accessory_functions/monocle_mods.R"))
source(here("scripts/accessory_functions/pattern_plotting.R"))
source(here("scripts/accessory_functions/patternFeatureCorrelationHeatmap.R"))
source(here("scripts/accessory_functions/convertHomologs.R"))
# none, cell_size_log, log
norm_method <- "cell_size_log"

#declare patterns of interest
MENS_patterns <- c(16,27,32,41)
select_patterns <- c(MENS_patterns)


print(norm_method)
## [1] "cell_size_log"
print(select_patterns)
## [1] 16 27 32 41
###Load in projections ----
#teichman_gut_atlas_filename <- 
teichman_mesenchymal_atlas_filename <- "/data/users/jared/atlas_processing/gut_cell_atlas/data/mesenchymal_gut.Rds"

cds <- readRDS(teichman_mesenchymal_atlas_filename)

#Previously learned gene weights from NMF on logscaled expression counts
lmmp_6mo_patttern_geneweights_filename <- here("results/NMF/lmmp/old_pattern_run/50dims/pattern_gene_weights.csv")
learned_weights <- read.csv(
  file = lmmp_6mo_patttern_geneweights_filename,
  row.names = 1)

npattern <- dim(learned_weights)[2]

Gut atlas is human, patterns are defined in mouse, so convert genes to human

homologs <- readRDS(here("teichman_gut_atlas/data/homologs.rds"))
# no_match <- tfs[!(tfs %in% homologs$Gene.name.human)]

#1 to 1 matches, NA if not present in dataset
matched_homologs <- convertHomologs(rownames(learned_weights), rownames(cds), homolog.table = homologs)
## detected inputs from MOUSE with id type Gene.name
## reference rownames detected HUMAN with id type Gene.name
## Found 15023 out of 22794 total inputs in conversion table
colnames(matched_homologs) <- c("mouse_gene_id","human_gene_id") 

matched_homologs_filt <- matched_homologs[!is.na(matched_homologs$human_gene_id),]

learned_weights_filt <- learned_weights[matched_homologs_filt$mouse_gene_id,]
rownames(learned_weights_filt) <- matched_homologs_filt$human_gene_id

#Now both are human genes, take intersection
genes_to_use <- intersect(rownames(learned_weights_filt), rownames(cds))

cds_filt <- cds[genes_to_use,]
learned_weights_filt <- learned_weights_filt[genes_to_use,]
load.existing.projection.values <- 1
if(load.existing.projection.values){
  #first column is cell barcodes, rest of columns are patterns
  transferred_cell_weights <- read.csv(here("teichman_gut_atlas/data/gut_atlas_in_6mo_LMMP_old_patterns.csv"),
                       row.names = 1)
}else{
  set.seed(131)
  #TODO: update to switch statement
  if(norm_method == "none"){ exprs_mat <- as.matrix(exprs(cds))}
  if(norm_method == "log"){ exprs_mat <- as.matrix(log10(exprs(cds)+1))}
  if(norm_method == "cell_size_log"){ 
    exprs_mat <- normalized_counts(cds_filt) 
    
  }   
  
  #sparse matrix too large to convert to dense, so break into chunks
  n_by <- 40000
  n <- dim(cds_filt)[2]
 
  breaks <- seq(from = 0, to = n, by= n_by)
  if(breaks[length(breaks)] != n){
    breaks <- c(breaks, dim(cds_filt)[2])
  }
  breaks
  
  exprs_list <- list()
  for(i in c(1:(length(breaks)-1))){
    #subset
    exprs_mat_sub <- exprs_mat[, seq(breaks[i]+1, breaks[i+1], by = 1)]
    exprs_list <- c(exprs_list, exprs_mat_sub)
  }
  names(exprs_list) <- paste0("sub", 1:(length(breaks)-1))
  
  #projection
  transferred_cell_weights_list <- furrr::future_map(exprs_list, function(mat){
    transferred_cell_wts_sub <- t(projectR::projectR(data = as.matrix(mat),
                                                 loadings = as.matrix(learned_weights_filt)))
    
    colnames(transferred_cell_wts_sub) = paste0("cellPattern",1:npattern)
    
    return(transferred_cell_wts_sub)
    
  })
  
  transferred_cell_weights <- do.call(rbind, transferred_cell_weights_list)
  
  write.csv(transferred_cell_weights,paste0(here("teichman_gut_atlas/data/gut_atlas_in_6mo_LMMP_old_patterns.csv")),
            quote = F, row.names = T)
}
#Bind pattern weights to object, make sure cells are exactly the same. Could left_merge but pData not being a true data.frame is an issue
assertthat::assert_that(all(rownames(pData(cds)) == rownames(transferred_cell_weights)),
                        msg = "Cells in projection weights matrix are not the same as cells in cds object")
## [1] TRUE
pData(cds) <- cbind(pData(cds), transferred_cell_weights)
pData(cds)[,"log10UMI"] <- log10(pData(cds)[,"total_counts"] +1)

pattern_wt_plots_select <- lapply(select_patterns, plotCellPatterns, cds_obj = cds, do.clip = c(0.01,0.99))
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
pattern_wt_plots_select
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

pattern_wt_plots <- lapply( 1:npattern, plotCellPatterns, cds_obj = cds, do.clip = c(0.01,0.99))
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## No trajectory to plot. Has learn_graph() been called yet?
## Cells aren't colored in a way that allows them to be grouped.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
pattern_wt_plots[1:5]
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

#calculate UMAP based on projected pattern weights instead of PCA
do.reembed <- 0
if (do.reembed){
  features_of_interest <- transferred_cell_wts[,paste0("cellPattern",select_patterns)]
  umap_res <- uwot::umap(as.matrix(features_of_interest),
                         metric="cosine",
                         min_dist = 0.1,
                         nn_method = "annoy")
  
  reducedDims(cds)$NMF_UMAP <- umap_res
  
  plot_cells_mod(cds, reduction_method = "NMF_UMAP", color_cells_by = "clusters", label_cell_groups = F) + 
    ggtitle(paste0("UMAP embedding based on projections of select ENS patterns: ", 
                   paste(select_patterns, collapse = ",")))
  
  p_wt_plots <-lapply(select_patterns, plotCellPatterns, cds, red_method = "NMF_UMAP", do.clip = c(0.01,0.99))
  
}      
plot_genes_by_group(cds, markers = toupper(c("Msln","Cdh3","Fmo2","Slpi","C3","Wt1","Upk3b","Slc17a9"))) + coord_flip() + theme(axis.text.y = element_text(size = 6))
## Warning in if (ordering_type == "cluster_row_col") {: the condition has length >
## 1 and only the first element will be used
## Warning in if (axis_order == "marker_group") {: the condition has length > 1 and
## only the first element will be used

pattern_usage_plots <- lapply(paste0("cellPattern",select_patterns), plotPatternUsageByCondition, cds, bin_by = "Age") 
pattern_usage_plots <- lapply(pattern_usage_plots, function(x){
  x + theme(axis.text.x = element_text(angle = 90))
})
pattern_usage_plots[[1]] + theme(axis.text.x = element_text(angle = 90))

pData(cds)$Age <- factor(pData(cds)$Age, levels = c("6.1Wk", "6.7Wk", "6.9Wk", "7.4Wk" ,"7.9Wk", "8.4Wk", "9.2Wk" ,"9.9Wk" ,"10.2Wk", "10Wk","11.1Wk","12Wk","15Wk", "16Wk", "17Wk", "4","6","9","10","11","12","13","14", "20-25", "25-30", "45-50", "60-65", "65-70", "70-75"))

#for each pattern, calculate average pattern usage by group, then add to everycell in colData
#Also change ages from "Wk" to "PCW"
cell_names <- colnames(cds)
pData(cds) <- pData(cds) %>%
  as.data.frame() %>%
  group_by(Age) %>%
  mutate(across(starts_with("cellPattern"), mean, .names = "{.col}_mean")) %>%
  ungroup() %>%
  mutate(Age = factor(Age, labels = stringr::str_replace_all(levels(Age), "Wk", "PCW"))) %>%
  DataFrame()
colnames(cds) <- cell_names


pattern_usage_plots_select <- lapply(paste0("cellPattern",select_patterns), FUN = function(x){
  
  plotPatternUsageByCondition(x, cds, bin_by = "Age") + theme(axis.text.x = element_text(angle = 90))

})
pattern_usage_plots_select
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

pattern_usage_plots <- lapply(paste0("cellPattern",1:npattern), FUN = function(x){
  
  plotPatternUsageByCondition(x, cds, bin_by = "Age") + theme(axis.text.x = element_text(angle = 90))

})
pattern_usage_plots[1:5]
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

fetal_ages <- levels(pData(cds)[,"Age"])[1:15]
juvenile_ages <- levels(pData(cds)[,"Age"])[16:23]
adult_ages <- levels(pData(cds)[,"Age"])[14:29]

#group by rough age, instead of ~30 timepoints
pData(cds) <- pData(cds) %>%
  as.data.frame() %>% 
  mutate(Age_binned = case_when(
    Age %in% fetal_ages ~ "fetal",
    Age %in% juvenile_ages ~ "juvenile",
    Age %in% adult_ages ~ "adult"
  )) %>% 
  mutate(Age_binned = factor(Age_binned, levels = c("fetal", "juvenile", "adult"))) %>%
  DataFrame()
transferred_cell_weights_filt <- transferred_cell_weights %>%
  select(all_of(paste0("cellPattern", 1))) %>% t()

cell_sample <- sample(rownames(transferred_cell_weights), 5000)
labels <- pData(cds)[cell_sample, "Age_binned"] %>%
  as.character() %>%
  setNames(cell_sample)

transferred_cell_weights_sampled <- transferred_cell_weights_filt[,cell_sample]
emd_res <- calculate_emd(transferred_cell_weights_sampled, labels, binSize = 0.001)

emdist::emd(as.matrix(transferred_cell_weights_filt[labels == "adult"]), as.matrix(transferred_cell_weights_filt[labels == "fetal"]))

map(select_patterns, function(pattern_no){
  pattern_name <- paste0("cellPattern", pattern_no)
 

  
  pl <- ggplot(as.data.frame(pData(cds))) +
    geom_density(aes_string(x = pattern_name, color = "Age_binned"))
  
  
})
n_genes <- 200
pattern_list <- map(paste0("cellPattern", MENS_patterns), function(pattern){
  pattern_arranged <- learned_weights[order(learned_weights[,pattern, drop = F],
                                            decreasing = T),] %>%
    .[,pattern, drop = F] %>%
    slice_head(n = n_genes) %>%
    tibble::rownames_to_column(var = "gene_name") %>%
    setNames(paste0(pattern, c("gene names", "gene_weights")))
  pattern_arranged
})
pattern_df <- bind_cols(pattern_list)

write.csv(pattern_df, here("results/NMF/lmmp/old_pattern_run/50dims/MENS_patterns_top_200_genes.csv"))

saveRDS(cds, here("teichman_gut_atlas/data/mesenchymal_gut_annotated.Rds"))